library(knitr)
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
message = FALSE, cache.lazy = FALSE, cache.path = "../../gitignore/RmdCache/", # keep heavy data in gitignore cache
fig.path = "Rmdfig/")
machine="mythinkpad" # define the machine we work on
loadALL = TRUE # load all uniteCov objects
loadannot = TRUE # load genome annotations
sourceDMS = TRUE # Load the calculated DMS
sourceSubUnite = FALSE
source("R02.3_DATALOAD.R")
## So far, we don't use DMR (because RRBS single end, this is not super meaningful)
rm(DMRBPlist, DMRlist)
Data getting loaded:
Compare fitness traits between the different offsprings groups. Follow up of Sagonas 2020 & Ferre Ortega’s master’s dissertation
Kaufmann et al. 2014: Body condition of the G2 fish, an estimate of fish health and a predictor of energy reserves and reproductive success, was calculated using there residuals from the regression of body mass on body length (Chellappaet al.1995).
fullMetadata_OFFS$BCI <- residuals(lmer(Wnettofin ~ Slfin * Sex + (1|brotherPairID), data=fullMetadata_OFFS))
## and for parents (no sex difference, only males):
fullMetadata_PAR$BCI <- residuals(lmer(Wnettofin ~ Slfin + (1|brotherPairID), data=fullMetadata_PAR))
Effect of paternal treatment on body condition of offspring: Kaufmann et al. 2014: “To investigate in which way paternal G1 exposure affected offspring tolerance, we tested how the relationship between G2 body condition and infection intensity was affected by paternal G1 exposure. This was tested in a linear mixed model on G2 body condition with paternal G1 treatment and the interaction between paternal G1 treatment and G2 infection intensity as fixed effects. Maternal half-sibship identity was set as a random effect”
Effect of treatment groups of offspring on body condition(Kaufmann et al. 2014): “The linear mixed effect model (nlme function in R) included G2 body condition as dependent variable, sex, G2 treatment (exposed vs. control), paternal G1 treatment (exposed vs. control) and their interactions as fixed effects as well as maternal G2 half-sibship identity as a random effect”
mod1 <- lme(BCI ~ offsTrt * patTrt, random=~1|brotherPairID,data=fullMetadata_OFFS)
anova(mod1) # strong significant effect of both offspring trt & paternal + interactions
## numDF denDF F-value p-value
## (Intercept) 1 100 0.000000 1.0000
## offsTrt 1 100 14.057746 0.0003
## patTrt 1 100 13.600342 0.0004
## offsTrt:patTrt 1 100 9.396573 0.0028
mod1.2 <- lme(BCI ~ trtG1G2, random=~1|brotherPairID,data=fullMetadata_OFFS)
## pairwise posthoc test
emmeans(mod1.2, list(pairwise ~ trtG1G2), adjust = "tukey")
## $`emmeans of trtG1G2`
## trtG1G2 emmean SE df lower.CL upper.CL
## NE_control 16.6 10.8 7 -8.87 42.0
## NE_exposed -57.7 11.0 7 -83.63 -31.8
## E_control 23.6 10.8 7 -1.85 49.0
## E_exposed 15.5 10.8 7 -9.90 41.0
##
## Degrees-of-freedom method: containment
## Confidence level used: 0.95
##
## $`pairwise differences of trtG1G2`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed 74.29 15.3 100 4.840 <.0001
## NE_control - E_control -7.03 15.2 100 -0.462 0.9671
## NE_control - E_exposed 1.03 15.2 100 0.067 0.9999
## NE_exposed - E_control -81.32 15.3 100 -5.298 <.0001
## NE_exposed - E_exposed -73.27 15.3 100 -4.773 <.0001
## E_control - E_exposed 8.05 15.2 100 0.529 0.9517
##
## Degrees-of-freedom method: containment
## P value adjustment: tukey method for comparing a family of 4 estimates
## Control father - treatment offspring has a strongly significantly lower BC than
## every other group, same as Kaufmann et al. 2014
myplot1 <- ggplot(fullMetadata_OFFS, aes(x=trtG1G2, y = BCI, fill=trtG1G2))+
geom_boxplot()+
geom_signif(comparisons = list(c("NE_control", "NE_exposed")),
map_signif_level=TRUE, annotations="***",
y_position = 150, tip_length = 0, vjust=0.4) +
geom_signif(comparisons = list(c("NE_exposed", "E_control")),
map_signif_level=TRUE, annotations="***",
y_position = 200, tip_length = 0, vjust=0.4) +
geom_signif(comparisons = list(c("NE_exposed", "E_exposed")),
map_signif_level=TRUE, annotations="***",
y_position = 250, tip_length = 0, vjust=0.4) +
scale_fill_manual(values = colOffs)+
theme_bw() + theme(legend.position = "none") +
ylab("Body Condition Index") +
scale_x_discrete(labels=c("NE_control" = "G1 control\nG2 control", "NE_exposed" = "G1 control\nG2 infected",
"E_control" = "G1 infected\nG2 control", "E_exposed" = "G1 infected\nG2 infected"),
name = NULL)
myplot1
mod_Tol <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex), data=fullMetadata_OFFS, REML = F)
## Model selection:
step(mod_Tol, reduce.random = F) # Model found: full model
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -608.33 1230.7
## (1 | brotherPairID) 0 6 -608.33 1228.7 0 1 1
## (1 | Sex) 0 6 -608.33 1228.7 0 1 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## No.Worms:PAT 0 20660 20660 1 111 6.1283 0.01481 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ No.Worms * PAT + (1 | brotherPairID) + (1 | Sex)
## The slope of BCI on nbrworms varies upon treatment
plot(ggpredict(mod_Tol, terms = c("No.Worms", "PAT")), add.data=T)+ theme_bw() +
ylab("Body Condition Index") + xlab("Number of worms") +
ggtitle("Predicted values of Body Condition Index in offspring")+
scale_color_manual(NULL, values = c("black", "red")) +
scale_fill_manual(NULL, values = c("black", "red")) +
scale_x_continuous(breaks = 0:10)+
geom_point(size=0)+ # to have color key in legend as point
guides(colour = guide_legend(override.aes = list(size=3,linetype=0, fill = NA)))
Calculate number of methylated sites, mean coverage, and residuals of methylated sites by covered sites (to account for coverage bias)
mycalcRMS <- function(myUniteCov, myMetaData){
percMethMat = methylKit::percMethylation(myUniteCov)
# create a dataframe with all info
percMethDF = data.frame(SampleID = colnames(percMethMat),
Nbr_methCpG = colSums(percMethMat>=70 & !is.na(percMethMat)), ## number of methylated sites
Nbr_coveredCpG = colSums(!is.na(percMethMat)), ## number of sites covered in this sample
Nbr_NOTcoveredCpG = colSums(is.na(percMethMat)),## number of sites NOT covered in this sample
MeanCoverage = colMeans(methylKit::getData(myUniteCov)[,myUniteCov@coverage.index], na.rm = T), ## coverage.index: vector denoting which columns in the data correspond to coverage values
OverallPercentageMethylation = colMeans(methylKit::percMethylation(myUniteCov), na.rm = T))
## RMS in this sample based on covered sites
percMethDF$RMS_coveredCpG = percMethDF$Nbr_methCpG / percMethDF$Nbr_coveredCpG
## merge with original metadata:
myMetaData = merge(myMetaData, percMethDF)
# calculate also RMS global, considering CpG covered or not (to compare)
myMetaData$RMS_allCpG_coveredOrNot = myMetaData$Nbr_methCpG / (myMetaData$M.Seqs_rawReads*10e6)
# calculate residuals of nbr of methCpG by nbr of covered CpG
myMetaData$res_Nbr_methCpG_Nbr_coveredCpG = residuals(
lm(myMetaData$Nbr_methCpG ~ myMetaData$Nbr_coveredCpG))
## REORDER myMetaData by sample ID
myMetaData = myMetaData[order(as.numeric(gsub("S", "", myMetaData$SampleID))),]
return(myMetaData)
}
fullMetadata <- mycalcRMS(uniteCovALL_woSexAndUnknowChr, fullMetadata)
fullMetadata_PAR <- mycalcRMS(uniteCov6_G1_woSexAndUnknowChrOVERLAP, fullMetadata_PAR)
fullMetadata_OFFS <- mycalcRMS(uniteCov14_G2_woSexAndUnknowChrOVERLAP, fullMetadata_OFFS)
We kept in total 135 samples. On average, 11.27 [+/- 0.33] million reads were sequenced. The average mapping efficiency was 85% [+/- 0.48%].
The mean coverage per CpG site in the full dataset, considering positions covered in all fish, is 83 [+/- 2.21].
For G1 only: We kept in total 24 samples. On average, 11.16 [+/- 0.67] million reads were sequenced. The average mapping efficiency was # 84% [+/- 1.39%].
The mean coverage per CpG site in G1, considering positions shared by at least 6 fish per treatment group (half individuals per group), and which overlap with positions retained in G2, is 46 [+/- 1.71].
For G2 only: We kept in total 111 samples. On average, 11.29 [+/- 0.38] million reads were sequenced. The average mapping efficiency was 86% [+/- 0.47%].
The mean coverage per CpG site in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, is 47 [+/- 0.76].
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
fullMetadata_PAR$Nbr_methCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$Nbr_methCpG
## S = 350, p-value = 2.153e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8478261
## S = 350, p-value = 2.15e-06, rho = 0.85
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")
## Check after RMS correction for coverage bias: CORRECTED (p-value = 0.4485)
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
fullMetadata_PAR$RMS_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$RMS_coveredCpG
## S = 2712, p-value = 0.4006
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1791304
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")
## and with residuals: COMPLETELY CORRECTED p-value = 0.9562
cor.test(fullMetadata_PAR$Nbr_coveredCpG,
fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_PAR$Nbr_coveredCpG and fullMetadata_PAR$res_Nbr_methCpG_Nbr_coveredCpG
## S = 2308, p-value = 0.9886
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.003478261
ggplot(fullMetadata_PAR, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = c("grey", "red")) +
theme_bw() + ggtitle(label = "Parents, CpG shared by half fish/trt")
############
## Offspring:
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
fullMetadata_OFFS$Nbr_methCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$Nbr_methCpG
## S = 20254, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9111355
## S = 20254, p-value < 2.2e-16 rho = 0.91
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
geom_smooth(method = "lm", col="black")+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
scale_x_continuous("Number of cytosines covered") +
scale_y_continuous("Number of methylated cytosines") +
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
## Plot distance to residuals:
fit <- lm(Nbr_methCpG ~ Nbr_coveredCpG, data = fullMetadata_OFFS)
plotdf <- fullMetadata_OFFS
plotdf$predicted <- predict(fit) # Save the predicted values
plotdf$residuals <- residuals(fit)
ggplot(plotdf, aes(x=Nbr_coveredCpG, y=Nbr_methCpG))+
geom_smooth(method = "lm", col="black")+
geom_segment(aes(xend = Nbr_coveredCpG, yend = predicted), col = "grey") +
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
scale_x_continuous("Number of cytosines covered") +
scale_y_continuous("Number of methylated cytosines") +
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
## Check after RMS correction for coverage bias: SEMI CORRECTED (p-value = 0.01, rho = -0.24)
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
fullMetadata_OFFS$RMS_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$RMS_coveredCpG
## S = 282466, p-value = 0.01158
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2393208
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=RMS_coveredCpG))+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
geom_smooth(method = "lm", col="black")+
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
## and with residuals: COMPLETELY CORRECTED p-value = 0.51
cor.test(fullMetadata_OFFS$Nbr_coveredCpG,
fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: fullMetadata_OFFS$Nbr_coveredCpG and fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG
## S = 213674, p-value = 0.514
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.06250439
ggplot(fullMetadata_OFFS, aes(x=Nbr_coveredCpG, y=res_Nbr_methCpG_Nbr_coveredCpG))+
geom_point(aes(col=trtG1G2), size = 3)+ scale_color_manual(values = colOffs) +
geom_smooth(method = "lm", col="black")+
scale_x_continuous("Number of cytosines covered") +
scale_y_continuous("Residuals of number of methylated cytosines\n on number of cytosines covered") +
theme_bw() + ggtitle(label = "Offspring, CpG shared by half fish/trt")
mod = lm(MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=203.17
## MappingEfficiency.BSBoldvsGynogen ~ Sex
##
## Df Sum of Sq RSS AIC
## <none> 667.74 203.18
## - Sex 1 22.563 690.30 204.86
##
## Call:
## lm(formula = MappingEfficiency.BSBoldvsGynogen ~ Sex, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9847 -1.5030 0.3133 2.0418 4.0823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.3134 0.3337 258.624 <2e-16 ***
## SexM -0.9017 0.4699 -1.919 0.0576 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.475 on 109 degrees of freedom
## Multiple R-squared: 0.03269, Adjusted R-squared: 0.02381
## F-statistic: 3.683 on 1 and 109 DF, p-value: 0.05758
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is WITH unknown and sex chromosomes, before filtering.
mod = lm(M.Seqs_rawReads ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=158.08
## M.Seqs_rawReads ~ Sex
##
## Df Sum of Sq RSS AIC
## - Sex 1 3.8569 448.66 157.04
## <none> 444.80 158.08
##
## Step: AIC=157.04
## M.Seqs_rawReads ~ 1
##
## Call:
## lm(formula = M.Seqs_rawReads ~ 1, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4901 -1.1901 -0.2901 0.8599 8.8099
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.2901 0.1917 58.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.02 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is WITH unknown and sex chromosomes, before filtering.
mod = lm(MeanCoverage ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=314.87
## MeanCoverage ~ Sex
##
## Df Sum of Sq RSS AIC
## - Sex 1 4.4425 1831.0 313.14
## <none> 1826.5 314.87
##
## Step: AIC=313.14
## MeanCoverage ~ 1
##
## Call:
## lm(formula = MeanCoverage ~ 1, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.8662 -2.4123 -0.6565 1.7138 15.0594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.8858 0.3872 121.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.08 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)
mod = lm(Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=2579.96
## Nbr_coveredCpG ~ Sex
##
## Df Sum of Sq RSS AIC
## - Sex 1 1.9317e+10 1.3495e+12 2579.6
## <none> 1.3302e+12 2580.0
##
## Step: AIC=2579.56
## Nbr_coveredCpG ~ 1
##
## Call:
## lm(formula = Nbr_coveredCpG ~ 1, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -338266 -58536 26350 82661 139983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 853897 10513 81.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 110800 on 110 degrees of freedom
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)
mod = lm(OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
summary(step(mod))
## Start: AIC=154.63
## OverallPercentageMethylation ~ Sex
##
## Df Sum of Sq RSS AIC
## <none> 431.18 154.63
## - Sex 1 12.428 443.61 155.78
##
## Call:
## lm(formula = OverallPercentageMethylation ~ Sex, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9467 -1.2674 -0.4101 0.6060 10.3697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.1035 0.2682 209.196 <2e-16 ***
## SexM -0.6692 0.3776 -1.772 0.0791 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.989 on 109 degrees of freedom
## Multiple R-squared: 0.02801, Adjusted R-squared: 0.0191
## F-statistic: 3.142 on 1 and 109 DF, p-value: 0.07911
plot(ggpredict(mod, terms = c("Sex")), add.data = T)
NB: this is in G2, considering positions shared by at least 14 fish per treatment group (half individuals per group), and which overlap with positions retained in G1, without sex and unknown chromosome (after filtering)
mod = lm(res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
summary(step(mod)) # sex is significant p = 0.000157 ***
## Start: AIC=2165.93
## res_Nbr_methCpG_Nbr_coveredCpG ~ Sex
##
## Df Sum of Sq RSS AIC
## <none> 3.1917e+10 2165.9
## - Sex 1 4493411296 3.6411e+10 2178.6
##
## Call:
## lm(formula = res_Nbr_methCpG_Nbr_coveredCpG ~ Sex, data = fullMetadata_OFFS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38952 -10289 -519 10434 45882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6420 2307 2.782 0.006361 **
## SexM -12726 3248 -3.917 0.000157 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17110 on 109 degrees of freedom
## Multiple R-squared: 0.1234, Adjusted R-squared: 0.1154
## F-statistic: 15.35 on 1 and 109 DF, p-value: 0.0001565
anova(mod)
## Analysis of Variance Table
##
## Response: res_Nbr_methCpG_Nbr_coveredCpG
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 4.4934e+09 4493411296 15.345 0.0001565 ***
## Residuals 109 3.1917e+10 292820190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(ggpredict(mod, terms = c("Sex")), add.data = T) +
xlab(NULL)+
ylab("Residuals of N methylated sites on N covered sites") +
ggtitle("Predicted values of global methylation in offspring")
fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG_div1000 <- (fullMetadata_OFFS$res_Nbr_methCpG_Nbr_coveredCpG)/1000
mod_Tol.Meth <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
data=fullMetadata_OFFS, REML = F)
## Model selection:
step(mod_Tol.Meth, reduce.random = F) # Model found: BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 -606.89 1235.8
## (1 | brotherPairID) 0 10 -606.89 1233.8 0 1 1
## (1 | Sex) 0 10 -606.89 1233.8 0 1 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT 1 2780.8 2780.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 2 2396.3 2396.3
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 3 1829.8 1829.8
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 4 2571.6 2571.6
## No.Worms:PAT 0 20659.8 20659.8
## NumDF DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT 1 111 0.8465 0.35953
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 1 111 0.7240 0.39667
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 1 111 0.5492 0.46020
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 1 111 0.7681 0.38270
## No.Worms:PAT 1 111 6.1283 0.01481
##
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms:PAT
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms
## res_Nbr_methCpG_Nbr_coveredCpG_div1000
## No.Worms:PAT *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## The slope of BCI on nbrworms varies upon treatment but methylation does NOT vary with tolerance
mod_Tol.Meth <- lmer(BCI ~ No.Worms*PAT + (1|brotherPairID)+ (1|Sex),
data=fullMetadata_OFFS)
## And by treatment instead of No.worms?
mod_Tol.Meth2 <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT*outcome + (1|brotherPairID)+ (1|Sex),
data=fullMetadata_OFFS, REML = F)
## Model selection:
step(mod_Tol.Meth2, reduce.random = F) # Model found: BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 -603.06 1228.1
## (1 | brotherPairID) 0 10 -603.06 1226.1 0 1 1
## (1 | Sex) 0 10 -603.06 1226.1 0 1 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome 1 2156.6 2156.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome 2 494.6 494.6
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 3 1034.5 1034.5
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 4 2542.3 2542.3
## PAT:outcome 0 30432.9 30432.9
## NumDF DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome 1 111 0.7034 0.403442
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome 1 111 0.1603 0.689644
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 1 111 0.3348 0.564008
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 1 111 0.8203 0.367046
## PAT:outcome 1 111 9.7478 0.002289
##
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT:outcome
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:outcome
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT
## res_Nbr_methCpG_Nbr_coveredCpG_div1000
## PAT:outcome **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ PAT + outcome + (1 | brotherPairID) + (1 | Sex) + PAT:outcome
The slope of BCI on nbr worms varies upon parental treatment, but methylation does NOT vary with tolerance
## By group, tolerance slope as a function of methylation residuals:
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*No.Worms + (1|brotherPairID) + (1|Sex),
data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -293.63 601.26
## (1 | brotherPairID) 0 6 -293.63 599.26 0.000000 1 1.0000
## (1 | Sex) 0 6 -293.66 599.32 0.067386 1 0.7952
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 1 362.1 362.1 1
## No.Worms 2 577.4 577.4 1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 3 8726.5 8726.5 1
## DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:No.Worms 49.047 0.1098 0.7418
## No.Worms 51.973 0.1776 0.6752
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 37.954 2.7327 0.1066
##
## Model found:
## BCI ~ (1 | brotherPairID) + (1 | Sex)
modFULL <- lmer(BCI ~ res_Nbr_methCpG_Nbr_coveredCpG_div1000*PAT + (1|brotherPairID) + (1|Sex),
data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2 %in% c("NE_exposed", "E_exposed"),])
## Model selection:
step(modFULL, reduce.random = F) # Model found: BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)
## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -278.14 570.27
## (1 | brotherPairID) 0 6 -278.14 568.27 0.0000 1 1.0000
## (1 | Sex) 0 6 -278.93 569.86 1.5904 1 0.2073
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 1 504 504 1
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 2 1476 1476 1
## PAT 0 76908 76908 1
## DenDF F value Pr(>F)
## res_Nbr_methCpG_Nbr_coveredCpG_div1000:PAT 50.451 0.2624 0.6107
## res_Nbr_methCpG_Nbr_coveredCpG_div1000 51.749 0.7782 0.3818
## PAT 47.592 41.0412 6.195e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## BCI ~ PAT + (1 | brotherPairID) + (1 | Sex)
# 1. get raw values
percmeth = percMethylation(uniteCov14_G2_woSexAndUnknowChrOVERLAP)
# Run PCA on complete data (CpG covered in all fish)
PCA_allpos <- myPCA(x = t(na.omit(percmeth)), incomplete = FALSE)
## [1] "The chosen model is:"
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55d44397b050>
We perform a PCA on 78384 CpG sites covered in all G2 individuals. We first perform a test on the complete dataset.
fviz_pca_ind(PCA_allpos$res.PCA, label="none", habillage=PCA_allpos$metadata$trtG1G2) +
scale_color_manual(values = colOffs)+
scale_shape_manual(values=c(19,19,19,19))
fviz_pca_ind(PCA_allpos$res.PCA, label="none", habillage=as.factor(PCA_allpos$metadata$brotherPairID))
# The function dimdesc() can be used to identify the most correlated variables with a given principal component.
mydimdesc <- dimdesc(PCA_allpos$res.PCA, axes = c(1,2), proba = 0.05)
There are 36212 CpG sites most correlated (p < 0.05) with the first principal component , and 15101 with the second principal component.
The 2 first PCA axes do not explain BCI (p<0.05)
# Percentage of variance explained by each factor:
formula(PCA_allpos$modSel) # BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## BCI ~ No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) + No.Worms:PAT
## <environment: 0x55d44397b050>
mod_noworms = lmer(BCI ~ PAT + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)
mod_noPAT = lmer(BCI ~ No.Worms + (1 | brotherPairID) + (1 | Sex), data = PCA_allpos$metadata)
# R2c conditional R2 value associated with fixed effects plus the random effects.
A = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
MuMIn::r.squaredGLMM(mod_noworms)[2])*100
B = (MuMIn::r.squaredGLMM(PCA_allpos$modSel)[2] -
MuMIn::r.squaredGLMM(mod_noPAT)[2])*100
# Set up scatterplot
scatterplot <- ggplot(fullMetadata_OFFS,
aes(x = res_Nbr_methCpG_Nbr_coveredCpG,
y = BCI, fill=trtG1G2)) +
geom_point(pch=21, size =3, alpha = .8) +
guides(color = "none") +
scale_fill_manual(values = colOffs, name = "Treatment",
labels = c("G1 control - G2 control", "G1 control - G2 exposed", "G1 exposed - G2 control", "G1 exposed - G2 exposed")) +
theme(plot.margin = margin()) + theme_bw() +
theme(legend.position = "none") +
xlab("Methylation residuals (methylated sites/coverage")+
ylab("Body Condition Index")
# Define marginal histogram
marginal_distribution <- function(x, var, group) {
ggplot(x, aes_string(x = var, fill = group)) +
# geom_histogram(bins = 30, alpha = 0.4, position = "identity") +
geom_density(alpha = 0.6, size = 0.2) +
guides(fill = "none") +
scale_fill_manual(values = colOffs) +
theme_void() +
theme(plot.margin = margin())
}
# Set up marginal histograms
x_hist <- marginal_distribution(fullMetadata_OFFS, "res_Nbr_methCpG_Nbr_coveredCpG", "trtG1G2")
y_hist <- marginal_distribution(fullMetadata_OFFS, "BCI", "trtG1G2") +
coord_flip()
# Align histograms with scatterplot
aligned_x_hist <- align_plots(x_hist, scatterplot, align = "v")[[1]]
aligned_y_hist <- align_plots(y_hist, scatterplot, align = "h")[[1]]
# Arrange plots
cowplot::plot_grid(
aligned_x_hist, NULL, scatterplot, aligned_y_hist, ncol = 2, nrow = 2, rel_heights = c(0.2, 1), rel_widths = c(1, 0.2)
)
Methylation profiles, CpG present in all fish
Methylation profile for the 55530 CpG sites covered in all G1 & G2 fish (N = 135:
makePrettyMethCluster(uniteCovALL_woSexAndUnknowChr, fullMetadata,
my.cols.trt=c("#333333ff","#ff0000ff","#ffe680ff","#ff6600ff","#aaccffff","#aa00d4ff"),
my.cols.fam = c(1:4), nbrk = 8)
Methylation profile for the 78384 CpG sites covered in all G2 fish (N = 111:
makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
my.cols.fam = c(1:4), nbrk = 8)
# Save
pdf(file = "../../dataOut/clusterALLCpG_offspings.pdf", width = 10, height = 4)
makePrettyMethCluster(uniteCovALL_G2_woSexAndUnknowChr, fullMetadata_OFFS,
my.cols.trt=c("#ffe680ff","#ff6600ff", "#aaccffff", "#aa00d4ff"),
my.cols.fam = c(1:4), nbrk = 8)
dev.off()
## png
## 2
Permutational multivariate analysis of variance (PERMANOVA) is a non-parametric multivariate statistical test. It is used to compare groups of objects and test the null hypothesis that the centroids and dispersion of the groups as defined by measure space are equivalent for all groups.
# make distance matrix with B-C distances
data.dist = makeDatadistFUN(uniteCovALL_G2_woSexAndUnknowChr)
## Adonis test: importance of each predictor
adonis2(data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = data.dist ~ PAT * outcome * Sex * brotherPairID, data = fullMetadata_OFFS)
## Df SumOfSqs R2 F Pr(>F)
## PAT 1 0.002782 0.01470 1.8292 0.001 ***
## outcome 1 0.001909 0.01009 1.2552 0.030 *
## Sex 1 0.002418 0.01277 1.5895 0.003 **
## brotherPairID 7 0.028018 0.14805 2.6316 0.001 ***
## PAT:outcome 1 0.001680 0.00888 1.1045 0.127
## PAT:Sex 1 0.001492 0.00789 0.9813 0.525
## outcome:Sex 1 0.001557 0.00823 1.0240 0.312
## PAT:brotherPairID 7 0.014344 0.07580 1.3473 0.001 ***
## outcome:brotherPairID 7 0.010591 0.05596 0.9947 0.506
## Sex:brotherPairID 7 0.010394 0.05492 0.9763 0.652
## PAT:outcome:Sex 1 0.001453 0.00768 0.9555 0.603
## PAT:outcome:brotherPairID 7 0.010351 0.05470 0.9722 0.719
## PAT:Sex:brotherPairID 7 0.010249 0.05415 0.9626 0.731
## outcome:Sex:brotherPairID 6 0.008749 0.04623 0.9587 0.742
## PAT:outcome:Sex:brotherPairID 1 0.001127 0.00596 0.7413 1.000
## Residual 54 0.082132 0.43399
## Total 110 0.189247 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results: family of the father (brotherPairID) explains more than 14% of the variance in methylation.
To focus on G1 and G2 treatments, we define the permutation structure considering brother pairs (N = 8), and use a PERMANOVA to test the hypothesis that paternal treatment, offspring treatment and their interactions significantly influencing global methylation.
perm <- how(nperm = 1000) # 1000 permutations
setBlocks(perm) <- with(fullMetadata_OFFS, brotherPairID) # define the permutation structure considering brotherPairID and sex
print(adonis2(data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(fullMetadata_OFFS, brotherPairID)
## Permutation: free
## Number of permutations: 1000
##
## adonis2(formula = data.dist ~ PAT * outcome * Sex, data = fullMetadata_OFFS, permutations = perm)
## Df SumOfSqs R2 F Pr(>F)
## PAT 1 0.002782 0.01470 1.6344 0.000999 ***
## outcome 1 0.001909 0.01009 1.1216 0.038961 *
## Sex 1 0.002418 0.01277 1.4203 0.003996 **
## PAT:outcome 1 0.001716 0.00907 1.0080 0.099900 .
## PAT:Sex 1 0.001740 0.00920 1.0224 0.192807
## outcome:Sex 1 0.001703 0.00900 1.0004 0.341658
## PAT:outcome:Sex 1 0.001653 0.00874 0.9712 0.329670
## Residual 103 0.175326 0.92644
## Total 110 0.189247 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results:
#### RUN Goodness of fit
myGOF.NMDS.FUN(dataset = uniteCovALL_G2_woSexAndUnknowChr)
Goodness of fit for NMDS suggested the presence of six dimensions with a stress value > 0.1 and 2 with > 0.2
## to find the seed that allows convergence:
# sapply(3:10, function(x) myNMDS(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = x))
NMDSanalysis <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr, metadata = fullMetadata_OFFS, myseed = 4)
NMDSanalysis$NMDSplot
# Save
pdf(file = "../../dataOut/NMDSplot_allG2.pdf", width = 9, height = 11)
NMDSanalysis$NMDSplot
dev.off()
## png
## 2
AdonisWithinG1trtFUN(trtgp = c(2,3))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,
## Permutation: free
## Number of permutations: 1000
##
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
## Df SumOfSqs R2 F Pr(>F)
## outcome 1 0.001475 0.01622 1.0073 0.486513
## Sex 1 0.002094 0.02302 1.4301 0.000999 ***
## brotherPairID 7 0.020026 0.22018 1.9537 0.001998 **
## Residual 46 0.067357 0.74058
## Total 55 0.090952 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results:
NMDSanalysis_G1control <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
metadata = fullMetadata_OFFS, myseed = 25,
byParentTrt=TRUE,
trtgp = c(2,3))
#png(filename = "../../dataOut/NMDSplot_G1fromControlG2.png", width = 900, height = 900)
NMDSanalysis_G1control$NMDSplot
#dev.off()
AdonisWithinG1trtFUN(trtgp = c(5,6))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: with(fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp,
## Permutation: free
## Number of permutations: 1000
##
## adonis2(formula = data.dist ~ outcome + Sex + brotherPairID, data = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% trtgp, ], permutations = perm)
## Model: adonis0(formula = ~outcome + Sex + brotherPairID, data = data, method = method)
## Df SumOfSqs R2 F Pr(>F)
## outcome 1 0.002160 0.02262 1.4047 0.006993 **
## Sex 1 0.002053 0.02150 1.3349 0.192807
## brotherPairID 7 0.022076 0.23117 2.0507 0.019980 *
## Residual 45 0.069205 0.72470
## Total 54 0.095494 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results:
NMDSanalysis_G1infected <- myNMDSFUN(dataset = uniteCovALL_G2_woSexAndUnknowChr,
metadata = fullMetadata_OFFS, myseed = 25,
byParentTrt=TRUE,
trtgp = c(5,6))
#png(filename = "../../dataOut/NMDSplot_G1fromInfectedG2.png", width = 900, height = 900)
NMDSanalysis_G1infected$NMDSplot
#dev.off()
Differential methylation by brother pair (sex as covariate):
## All DMS are stored in DMSBPlist by brother pair
## Vector of all 4 comparisons
vecCompa <- c("CC_TC", "CT_TT", "CC_CT", "TC_TT")
vecCompaVerbose <- c("Control offspring in control vs infected parents", "Infected offspring in control vs infected parents", "Control vs infected offspring from control parent", "Control vs infected offspring from infected parent") # this is useful in plots
## Extract DMS for all 4 comparisons (by position)
myPosList = lapply(DMSBPlist, lapply, function(x){paste(x$chr, x$end)})
## Prepare df for complexUpset
getUpsetDF = function(i, NBP){ # for a given comparison
A = get2keep(vecCompa[i], NBP)
A2 = lapply(A, function(x){
x = data.frame(x) # vector of DMS as df
names(x) = "DMS" # name each CpG
return(x)
})
## Add BP name
for (i in 1:length(names(A2))){
A2[[i]]["BP"] = names(A2)[i]
}
# make a dataframe
A2 = A2 %>% reduce(full_join, by = "DMS")
# names column with BP id
for (i in 2:ncol(A2)) {names(A2)[i] = unique(A2[!is.na(A2[i]), i])}
# replace by 0 or 1 the DMS absence/presence
A = data.frame(apply(A2[2:9], 2, function(x) ifelse(is.na(x), 0, 1)))
# add DMS
A$DMS = A2$DMS
return(A)
}
Make upset plots for DMS found in at least 6 BP:
for (i in 1:4){
df = getUpsetDF(i, NBP = 6)
print(ComplexUpset::upset(
df,
names(df)[1:8],
width_ratio=0.1,
themes=upset_default_themes(text=element_text(size=15)),
sort_intersections_by=c('degree', 'cardinality'),
queries=query_by_degree(
df, names(df)[1:8],
params_by_degree=list(
'1'=list(color='red', fill='red'),
'2'=list(color='purple', fill='purple'),
'3'=list(color='blue', fill='blue'),
'4'=list(color='grey', fill='grey'),
'5'=list(color='red', fill='red'),
'6'=list(color='purple', fill='purple'),
'7'=list(color='blue', fill='blue'),
'8'=list(color='green', fill='green')
),
only_components=c("intersections_matrix", "Intersection size")
)) + ggtitle(paste0("Differentially methylated sites found in more than six brother pairs in the comparison: \n", vecCompaVerbose[i]))) #+ theme(plot.title = element_text(size = 30)))
}
PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons INTERACTION effects: DMS found in CC-CT which show a differential methylation (not necessarily significant) in the opposite direction in TC-TT, or inversely
## Subselect those DMS present in at least 4 out of 8 BP
get2keep = function(Compa, NBP = 4){
x <- lapply(myPosList, function(x){unlist(x[[paste0("DMS_15pc_BP_", Compa)]])})
f <- table(unlist((x))) # each DMS present between 1 and 8 times
tokeep <- names(f)[f >= NBP]
# print(length(tokeep))
## Keep the DMS present in 4 families minimum
DMSBPlist_INTER4 <- lapply(x, function(x){x[x %in% tokeep]})
## Reorder by family:
DMSBPlist_INTER4 <- DMSBPlist_INTER4[names(DMSBPlist_INTER4)[order(names(DMSBPlist_INTER4))]]
return(DMSBPlist_INTER4)
}
# PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons
DMS_PaternalEffect_4BPmin = unique(c(unlist(get2keep(vecCompa[1], NBP = 4)), unlist(get2keep(vecCompa[2], NBP = 4))))
# OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons
DMS_OffspringEffect_4BPmin = unique(c(unlist(get2keep(vecCompa[3], NBP = 4)), unlist(get2keep(vecCompa[4], NBP = 4))))
# INTERACTION effects: DMS found in CC-CT which show a differential methylation in the opposite direction in TC-TT, or inversely (reaction norm are inversed)
setDMSInversionSlope = getInteractionDMS() # in customfunctions.R
# Plot a Venn diagram
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InversionSlope" = setDMSInversionSlope),
label_alpha = 0) +
scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none")
# Save:
pdf(file = "../../dataOut/DMS3groupsVenn.pdf", width = 7, height = 6)
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin, "Offspring effect" = DMS_OffspringEffect_4BPmin, "InversionSlope" = setDMSInversionSlope),
label_alpha = 0) +
scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none")
dev.off()
## png
## 2
# The five cases of Venn diagram:
caseVennG1only = DMS_PaternalEffect_4BPmin[!DMS_PaternalEffect_4BPmin %in% DMS_OffspringEffect_4BPmin]
caseVennG2only = DMS_OffspringEffect_4BPmin[!DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin &
!DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope]
caseVennG1G2NOinter = DMS_OffspringEffect_4BPmin[DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin &
!DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope]
caseVennG1G2inter = DMS_OffspringEffect_4BPmin[DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin &
DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope]
caseVennG2interNOG1 = DMS_OffspringEffect_4BPmin[!DMS_OffspringEffect_4BPmin %in% DMS_PaternalEffect_4BPmin &
DMS_OffspringEffect_4BPmin %in% setDMSInversionSlope]
The size of the different sections of the Venn diagram are as follows:
Plot the reaction norms with real data as a validation of principle:
plots = makePlotsobservedReactionNorms()
pdf("../../dataOut/caseVennG1only.pdf", width = 15, height = 10)
plots$plot1
dev.off()
## png
## 2
pdf("../../dataOut/caseVennG2only.pdf", width = 15, height = 10)
plots$plot2
dev.off()
## png
## 2
pdf("../../dataOut/caseVennG1G2NOinter.pdf", width = 15, height = 10)
plots$plot3
dev.off()
## png
## 2
pdf("../../dataOut/caseVennG1G2inter.pdf", width = 15, height = 10)
plots$plot4
dev.off()
## png
## 2
pdf("../../dataOut/caseVennG2interNOG1.pdf", width = 15, height = 10)
plots$plot5
dev.off()
## png
## 2
## Venn diagram by genes found in each comparisons
# PARENTAL effect: DMS found in either CC-TC or CT-TT comparisons
DMS_PaternalEffect_4BPmin_GENES = getAnnotationFun(DMSdf = DMS_PaternalEffect_4BPmin,
annotBed12 = annotBed12, annotGff3 = annotGff3, isDMDaDataframeWithBP = FALSE)
# OFFSPRING effect: DMS found in either CC-CT or TC-TT comparisons
DMS_OffspringEffect_4BPmin_GENES = getAnnotationFun(DMSdf = DMS_OffspringEffect_4BPmin,
annotBed12 = annotBed12, annotGff3 = annotGff3, isDMDaDataframeWithBP = FALSE)
# INTERACTION effects: DMS found in CC-CT which show a differential methylation in the opposite direction in TC-TT, or inversely (reaction norm are inversed)
setDMSInversionSlope_GENES = getAnnotationFun(DMSdf = setDMSInversionSlope,
annotBed12 = annotBed12, annotGff3 = annotGff3, isDMDaDataframeWithBP = FALSE)
# Plot a Venn diagram
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin_GENES$Name, "Offspring effect" = DMS_OffspringEffect_4BPmin_GENES$Name,
"InversionSlope" = setDMSInversionSlope_GENES$Name),
label_alpha = 0) +
scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none") + ggtitle("Genes in each effect")
# Save:
pdf(file = "../../dataOut/DMS3groupsVenn_GENES.pdf", width = 7, height = 6)
ggVennDiagram(list("Paternal effect" = DMS_PaternalEffect_4BPmin_GENES$Name, "Offspring effect" = DMS_OffspringEffect_4BPmin_GENES$Name,
"InversionSlope" = setDMSInversionSlope_GENES$Name),
label_alpha = 0) +
scale_fill_gradient(low="white",high = "red") + theme(legend.position = "none") + ggtitle("Genes in each effect")
dev.off()
## png
## 2
# The five cases of Venn diagram:
annot_caseVennG1only = DMS_PaternalEffect_4BPmin_GENES[!DMS_PaternalEffect_4BPmin_GENES$Name %in% DMS_OffspringEffect_4BPmin_GENES$Name,]
annot_caseVennG2only = DMS_OffspringEffect_4BPmin_GENES[!DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name &
!DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,]
annot_caseVennG1G2NOinter = DMS_OffspringEffect_4BPmin_GENES[DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name &
!DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,]
annot_caseVennG1G2inter = DMS_OffspringEffect_4BPmin_GENES[DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name &
DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,]
annot_caseVennG2interNOG1 = DMS_OffspringEffect_4BPmin_GENES[!DMS_OffspringEffect_4BPmin_GENES$Name %in% DMS_PaternalEffect_4BPmin_GENES$Name &
DMS_OffspringEffect_4BPmin_GENES$Name %in% setDMSInversionSlope_GENES$Name,]
namesCases = c("G1", "G2", "G1+G2", "G1:G2", "G2noG1")
casesVec=c("annot_caseVennG1only", "annot_caseVennG2only", "annot_caseVennG1G2NOinter", "annot_caseVennG1G2inter", "annot_caseVennG2interNOG1")
plotAndsave <- function(i){
P=plotManhattanGenesDMS4BP(annotFile = get(casesVec[i]),
GYgynogff = GYgynogff, isBPinfo = FALSE,
myxlab = paste0("Genes linked with the ", namesCases[[i]], " effect"))
print(P)
pdf(paste0("../../dataOut/Manhattan", namesCases[[i]], ".pdf"), width = 16, height = 5)
P
}
annot_caseVennG1G2inter[annot_caseVennG1G2inter$chrom %in% "Gy_chrI",]
## GeneSymbol Name seqid source type start end strand
## 16 OR11A1 gasAcul06751-RA Gy_chrI maker mRNA 163569 164495 +
## 133 VEGFC gasAcul07707-RA Gy_chrI maker mRNA 25390253 25395728 +
## 174 BARX2 gasAcul06935-RA Gy_chrI maker mRNA 3714027 3722880 -
## 185 SLC8A2 gasAcul07399-RA Gy_chrI maker mRNA 15814742 15835770 +
## 215 PARD3B gasAcul07626-RA Gy_chrI maker mRNA 22467146 22482347 -
## 222 FGF14 gasAcul07676-RA Gy_chrI maker mRNA 24734549 24751466 +
## 252 GRAMD1B gasAcul06941-RA Gy_chrI maker mRNA 3901325 3948148 +
## ID Alias
## 16 gasAcul06751-RA maker-Gy_chrI-snap-gene-0.8-mRNA-1
## 133 gasAcul07707-RA snap_masked-Gy_chrI-processed-gene-84.27-mRNA-1
## 174 gasAcul06935-RA maker-Gy_chrI-snap-gene-12.5-mRNA-1
## 185 gasAcul07399-RA maker-Gy_chrI-snap-gene-52.26-mRNA-1
## 215 gasAcul07626-RA maker-Gy_chrI-augustus-gene-75.6-mRNA-1
## 222 gasAcul07676-RA maker-Gy_chrI-snap-gene-82.58-mRNA-1
## 252 gasAcul06941-RA snap_masked-Gy_chrI-processed-gene-13.2-mRNA-1
## Note
## 16 Similar to OR11A1: Olfactory receptor 11A1 (Homo sapiens OX=9606)
## 133 Similar to VEGFC: Vascular endothelial growth factor C (Homo sapiens OX=9606)
## 174 Similar to Barx2: Homeobox protein BarH-like 2 (Mus musculus OX=10090)
## 185 Similar to Slc8a2: Sodium/calcium exchanger 2 (Mus musculus OX=10090)
## 215 Similar to Pard3b: Partitioning defective 3 homolog B (Mus musculus OX=10090)
## 222 Similar to Fgf14: Fibroblast growth factor 14 (Rattus norvegicus OX=10116)
## 252 Similar to Gramd1b: Protein Aster-B (Mus musculus OX=10090)
## Dbxref
## 16 InterPro:IPR000725, Pfam:PF13853
## 133
## 174 InterPro:IPR001356, Pfam:PF00046
## 185 InterPro:IPR003644, InterPro:IPR004837, InterPro:IPR032452, Pfam:PF01699, Pfam:PF03160, Pfam:PF16494
## 215 InterPro:IPR001478, Pfam:PF00595
## 222 InterPro:IPR002209, Pfam:PF00167
## 252
## Ontology_term Parent X_AED
## 16 GO:0004984, GO:0007186, GO:0016021 gasAcul06751 0.11
## 133 gasAcul07707 0.19
## 174 GO:0003677 gasAcul06935 0.19
## 185 GO:0007154, GO:0016021, GO:0055085 gasAcul07399 0.11
## 215 GO:0005515 gasAcul07626 0.22
## 222 GO:0008083 gasAcul07676 0.20
## 252 gasAcul06941 0.95
## X_QI X_eAED nCpG geneLengthkb nCpGperGenekb
## 16 0|0|0|1|1|1|2|0|248 0.11 1 0.926 1.08
## 133 0|0|0|0.75|1|1|8|0|316 0.11 1 5.475 0.18
## 174 0|0.33|0.5|1|0.66|0.75|4|886|254 0.23 1 8.853 0.11
## 185 0|0|0|1|0.58|0.61|13|0|828 0.12 2 21.028 0.10
## 215 0|0|0|0.87|0.42|0.62|8|0|521 0.22 1 15.201 0.07
## 222 0|0|0|1|0.33|0.75|4|0|174 0.20 1 16.917 0.06
## 252 0|0|0|0.06|1|1|16|0|690 0.95 2 46.823 0.04
## chrom ENTREZID description
## 16 Gy_chrI 26531 olfactory receptor family 11 subfamily A member 1
## 133 Gy_chrI 7424 vascular endothelial growth factor C
## 174 Gy_chrI 8538 BARX homeobox 2
## 185 Gy_chrI 6543 solute carrier family 8 member A2
## 215 Gy_chrI 117583 par-3 family cell polarity regulator beta
## 222 Gy_chrI 2259 fibroblast growth factor 14
## 252 Gy_chrI 57476 GRAM domain containing 1B
## summary
## 16 Olfactory receptors interact with odorant molecules in the nose, to initiate a neuronal response that triggers the perception of a smell. The olfactory receptor proteins are members of a large family of G-protein-coupled receptors (GPCR) arising from single coding-exon genes. Olfactory receptors share a 7-transmembrane domain structure with many neurotransmitter and hormone receptors and are responsible for the recognition and G protein-mediated transduction of odorant signals. The olfactory receptor gene family is the largest in the genome. The nomenclature assigned to the olfactory receptor genes and proteins for this organism is independent of other organisms. [provided by RefSeq, Jul 2008]
## 133 The protein encoded by this gene is a member of the platelet-derived growth factor/vascular endothelial growth factor (PDGF/VEGF) family. The encoded protein promotes angiogenesis and endothelial cell growth, and can also affect the permeability of blood vessels. The proprotein is further cleaved into a fully processed form that can bind and activate VEGFR-2 and VEGFR-3 receptors. [provided by RefSeq, Apr 2014]
## 174 This gene encodes a member of the homeobox transcription factor family. A highly related protein in mouse has been shown to influence cellular processes that control cell adhesion and remodeling of the actin cytoskeleton in myoblast fusion and chondrogenesis. The encoded protein may also play a role in cancer progression. [provided by RefSeq, Jul 2008]
## 185
## 215
## 222 The protein encoded by this gene is a member of the fibroblast growth factor (FGF) family. FGF family members possess broad mitogenic and cell survival activities, and are involved in a variety of biological processes, including embryonic development, cell growth, morphogenesis, tissue repair, tumor growth and invasion. A mutation in this gene is associated with autosomal dominant cerebral ataxia. Alternatively spliced transcript variants have been found for this gene. [provided by RefSeq, Jul 2008]
## 252
plotAndsave(1); dev.off()
## png
## 2
plotAndsave(2); dev.off()
## png
## 2
plotAndsave(3); dev.off()
## png
## 2
plotAndsave(4); dev.off()
## png
## 2
plotAndsave(5); dev.off()
## png
## 2
rm(casesVec,namesCases, plotAndsave)
# create gene universe
gene_universe <- data.frame(
subsetByOverlaps(GRanges(annotGff3), GRanges(uniteCov14_G2_woSexAndUnknowChrOVERLAP))) %>% # subselect covered CpGs
filter(lengths(Ontology_term)!=0) %>% # rm non existing GO terms
filter(type %in% "gene") %>% # keep all the 7416 genes with GO terms
dplyr::select(c("Name", "Ontology_term")) %>%
mutate(go_linkage_type = "IEA") %>% #NB: IEA but not necessarily true, it's from Interproscan after Maker. Sticklebacks (biomart) have 82701 IEA and 63 ISS.
relocate("Ontology_term","go_linkage_type","Name") %>%
unnest(Ontology_term) %>% # one GO per line (was a list before in this column)
data.frame()
# Create gene set collection
goFrame <- GOFrame(gene_universe, organism="Gasterosteus aculeatus")
goAllFrame <- GOAllFrame(goFrame)
gsc_universe <- GeneSetCollection(goAllFrame, setType = GOCollection())
IMPORTANT NOTE from Mel: why conditional hypergeometric test? The GO ontology is set up as a directed acyclic graph, where a parent term is comprised of all its child terms. If you do a standard hypergeometric, you might e.g., find ‘positive regulation of kinase activity’ to be significant. If you then test ‘positive regulation of catalytic activity’, which is a parent term, then it might be significant as well, but only because of the terms coming from positive regulation of kinase activity.
The conditional hypergeometric takes this into account, and only uses those terms that were not already significant when testing a higher order (parent) term.
GO_G1only = makedfGO(annot_caseVennG1only, gene_universe, effect = "G1only")
GO_G2only = makedfGO(annot_caseVennG2only, gene_universe, effect = "G2only")
GO_G1G2NOinter = makedfGO(annot_caseVennG1G2NOinter, gene_universe, effect = "G1G2NOinter")
GO_G1G2inter = makedfGO(annot_caseVennG1G2inter, gene_universe, effect = "G1G2inter")
GO_G2interNOG1 = makedfGO(annot_caseVennG2interNOG1, gene_universe, effect = "G2interNOG1")
dfGO = rbind(GO_G1only, GO_G2only, GO_G1G2NOinter, GO_G1G2inter, GO_G2interNOG1)
dfGO %>% ggplot(aes(x=Effect, y = factor(GO.name))) +
geom_point(aes(color = p.value.adjusted, size = genePercent)) +
scale_color_gradient(name="adjusted\np-value", low = "red", high = "blue") +
scale_size_continuous(name = "% of genes")+
theme_bw() + ylab("") + xlab("Treatments comparison") +
theme(legend.box.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.background = element_rect(fill = "#ebebeb", color = "#ebebeb"),
legend.key = element_rect(fill = "#ebebeb", color = "#ebebeb"), legend.position="left") + # grey box for legend
facet_grid(fct_inorder(GO.category)~., scales="free",space = "free")
Approach:
PCA
extract axes 1 & 2
Correlation with parasite load/BCI
source("R05_PCAatDMS.R")
## [1] "The chosen model is:"
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
## PCA2:PAT + No.Worms:PAT
## <environment: 0x556480fd7570>
## [1] "The chosen model is:"
## BCI ~ PCA2 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
## PCA2:PAT + No.Worms:PAT
## <environment: 0x5564483f8240>
## [1] "The chosen model is:"
## BCI ~ PCA1 + No.Worms + PAT + (1 | brotherPairID) + (1 | Sex) +
## PCA1:PAT + No.Worms:PAT
## <environment: 0x55645e6eeca8>
## Features Annotation (use package genomation v1.24.0)
## NB Promoters are defined by options at genomation::readTranscriptFeatures function.
## The default option is to take -1000,+1000bp around the TSS and you can change that.
## -> following Heckwolf 2020 and Sagonas 2020, we consider 1500bp upstream and 500 bp downstream
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
par(mfrow=c(1,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
diffAnn_PAR
## promoter exon intron intergenic
## 8.58 15.84 34.13 45.72
## promoter exon intron intergenic
## 8.58 13.19 32.51 45.72
## promoter exon intron
## 1.07 0.19 0.44
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 3020 8128 15141 19226 300247
genomation::plotTargetAnnotation(diffAnn_PAR,precedence=TRUE, main="DMS G1", cex.legend = 1, border="white")
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
diffAnn_G2_controlG1
## promoter exon intron intergenic
## 11.28 21.05 30.16 44.44
## promoter exon intron intergenic
## 11.28 16.21 28.07 44.44
## promoter exon intron
## 0.38 0.07 0.14
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11 2602 6295 14212 15906 261017
genomation::plotTargetAnnotation(diffAnn_G2_controlG1,precedence=TRUE, main="DMS G2-G1c", cex.legend = 1, border="white")
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
diffAnn_G2_infectedG1
## promoter exon intron intergenic
## 12.90 13.62 34.64 46.81
## promoter exon intron intergenic
## 12.90 9.42 30.87 46.81
## promoter exon intron
## 0.28 0.03 0.08
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9 2355 7150 12285 14773 109527
genomation::plotTargetAnnotation(diffAnn_G2_infectedG1,precedence=TRUE, main="DMS G2-G1i", cex.legend = 1, border="white")
par(mfrow=c(1,1))
## Run the function to get DMS info
DMS_info_G1 <- myDMSinfo(DMSlist$DMS_15pc_G1_C_T, uniteCov6_G1_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1c_final <- myDMSinfo(DMSlist$DMS_15pc_CC_CT, uniteCov14_G2_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1i_final <- myDMSinfo(DMSlist$DMS_15pc_TC_TT,uniteCov14_G2_woSexAndUnknowChrOVERLAP)
NB Kostas’ results: “We found a total of 1,973 CpG sites out of 1,172,887 CpGs (0.17%) across the genome that showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish”
Here: we obtain out a total of 1001880 CpG sites: 3648 (0.36%) showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish in the parental group; 1197 (0.12%) in the offspring from control parents comparison; 690 (0.07%) in the offspring from infected parents comparison.
## Chi2 test: are the number of DMS from G2-G1C and G2-G1I overlapping with DMSpar statistically different?
A=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1c_final$DMS))
B=length(DMS_info_G2_G1c_final$DMS)
C=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1i_final$DMS))
D=length(DMS_info_G2_G1i_final$DMS)
Observed=matrix(c(A, B-A, C, D-C),nrow=2)
Observed
## [,1] [,2]
## [1,] 106 59
## [2,] 1091 631
chisq.test(Observed)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Observed
## X-squared = 0.019909, df = 1, p-value = 0.8878
## not statistically different
## output Venn diagrams
allVenn <- ggVennDiagram(list("DMS G1" = DMS_info_G1$DMS, "DMS G2-c" = DMS_info_G2_G1c_final$DMS, "DMS G2-i" = DMS_info_G2_G1i_final$DMS), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
hypoVenn <- ggVennDiagram(list("DMS G1\nhypo" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hypo"],
"DMS G2-c\nhypo" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hypo"],
"DMS G2-i\nhypo" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hypo"]), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
hyperVenn <- ggVennDiagram(list("DMS G1\nhyper" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hyper"],
"DMS G2-c\nhyper" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hyper"],
"DMS G2-i\nhyper" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hyper"]), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
ggarrange(allVenn,
ggarrange(hypoVenn, hyperVenn, ncol = 2, legend = "none"),
nrow = 2, widths = c(.5,1))
runHyperHypoAnnot <- function(){
par(mfrow=c(2,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
####### HYPO
## Parents comparison:
A = annotateWithGeneParts(
as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(A,precedence=TRUE, main="DMS G1\nhypo",
cex.legend = .4, border="white")
## Offspring from control parents comparison:
B = annotateWithGeneParts(
as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(B,precedence=TRUE, main="DMS G2-G1c\nhypo",
cex.legend = .4, border="white")
## Offspring from infected parents comparison:
C = annotateWithGeneParts(
as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(C,precedence=TRUE, main="DMS G2-G1i\nhypo",
cex.legend = .4, border="white")
####### HYPER
## Parents comparison:
D = annotateWithGeneParts(
as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(D,precedence=TRUE, main="DMS G1\nhyper",
cex.legend = .4, border="white")
## Offspring from control parents comparison:
E = annotateWithGeneParts(
as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(E,precedence=TRUE, main="DMS G2-G1c\nhyper",
cex.legend = .4, border="white")
## Offspring from infected parents comparison:
f = annotateWithGeneParts(
as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(f,precedence=TRUE, main="DMS G2-G1i\nhyper",
cex.legend = .4, border="white")
par(mfrow=c(1,1))
return(list(G1hypo=A, G2G1chypo=B, G2G1ihypo=C, G1hyper=D, G2G1chyper=E, G2G1ihyper=f))
}
myannot=runHyperHypoAnnot()
############################################################
## Venn diagram of overlapping features by their annotation:
table(rowSums(as.data.frame(myannot$G1hypo@members))) # NB: some positions are labelled with several features!
##
## 0 1 2 3
## 559 566 49 2
## as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"
myAnnotateDMS <- function(DMS, annot){
## sanity check
if (nrow(DMS) != nrow(annot)){"STOP error in arguments"}
DMS$pos <- paste(DMS$chr, DMS$start, DMS$end)
## NB as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"
DMS$feature <- NA
## 1. promoters
DMS$feature[which(annot$prom == 1)] = "promoter"
## 2. exons
DMS$feature[which(annot$exon == 1 & annot$prom ==0)] = "exon"
## 3. intron
DMS$feature[which(annot$intro == 1 & annot$exon == 0 & annot$prom ==0)] = "intron"
## 4. intergenic regions
DMS$feature[which(annot$intro == 0 & annot$exon == 0 & annot$prom ==0)] = "intergenic"
return(DMS)
}
DMSlist$DMS_15pc_G1_C_T = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T, as.data.frame(diffAnn_PAR@members))
DMSlist$DMS_15pc_G1_C_T_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],
as.data.frame(myannot$G1hypo@members))
DMSlist$DMS_15pc_G1_C_T_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],
as.data.frame(myannot$G1hyper@members))
DMSlist$DMS_15pc_CC_CT = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT, as.data.frame(diffAnn_G2_controlG1@members))
DMSlist$DMS_15pc_CC_CT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],
as.data.frame(myannot$G2G1chypo@members))
DMSlist$DMS_15pc_CC_CT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],
as.data.frame(myannot$G2G1chyper@members))
DMSlist$DMS_15pc_TC_TT = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT, as.data.frame(diffAnn_G2_infectedG1@members))
DMSlist$DMS_15pc_TC_TT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],
as.data.frame(myannot$G2G1ihypo@members))
DMSlist$DMS_15pc_TC_TT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],
as.data.frame(myannot$G2G1ihyper@members))
## Make Venn diagram for each feature
getFeatureDFHYPO <- function(myfeat){
a = DMSlist$DMS_15pc_G1_C_T_HYPO$pos[DMSlist$DMS_15pc_G1_C_T_HYPO$feature %in% myfeat]
b = DMSlist$DMS_15pc_CC_CT_HYPO$pos[DMSlist$DMS_15pc_CC_CT_HYPO$feature %in% myfeat]
c = DMSlist$DMS_15pc_TC_TT_HYPO$pos[DMSlist$DMS_15pc_TC_TT_HYPO$feature %in% myfeat]
return(list(a=a,b=b,c=c))
}
getFeatureDFHYPER <- function(myfeat){
a = DMSlist$DMS_15pc_G1_C_T_HYPER$pos[DMSlist$DMS_15pc_G1_C_T_HYPER$feature %in% myfeat]
b = DMSlist$DMS_15pc_CC_CT_HYPER$pos[DMSlist$DMS_15pc_CC_CT_HYPER$feature %in% myfeat]
c = DMSlist$DMS_15pc_TC_TT_HYPER$pos[DMSlist$DMS_15pc_TC_TT_HYPER$feature %in% myfeat]
return(list(a=a,b=b,c=c))
}
getVenn <- function(feat, direction){
if (direction == "hypo"){
ggVennDiagram(list(A = getFeatureDFHYPO(feat)[["a"]],
B = getFeatureDFHYPO(feat)[["b"]],
C = getFeatureDFHYPO(feat)[["c"]]), label_alpha = 0,
category.names = c(paste0("DMS G1\nhypo\n", feat), paste0("DMS G2-c\nhypo\n", feat), paste0("DMS G2-i\nhypo\n", feat))) +
scale_fill_gradient(low="white",high = "red")
} else if (direction == "hyper"){
ggVennDiagram(list(A = getFeatureDFHYPER(feat)[["a"]],
B = getFeatureDFHYPER(feat)[["b"]],
C = getFeatureDFHYPER(feat)[["c"]]), label_alpha = 0,
category.names = c(paste0("DMS G1\nhyper\n", feat), paste0("DMS G2-c\nhyper\n", feat), paste0("DMS G2-i\nhyper\n", feat))) +
scale_fill_gradient(low="white",high = "red")
}
}
ggarrange(getVenn("promoter", "hypo"), getVenn("exon", "hypo"),
getVenn("intron", "hypo"), getVenn("intergenic", "hypo"),
nrow = 2, ncol = 2)
ggarrange(getVenn("promoter", "hyper"), getVenn("exon", "hyper"),
getVenn("intron", "hyper"), getVenn("intergenic", "hyper"),
nrow = 2, ncol = 2)
## Parents trt-ctrl
# load annotation
annot_PAR <- as.data.frame(diffAnn_PAR@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T, annotFile = annot_PAR, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")
## G2-G1c trt-ctrl
# load annotation
annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT, annotFile = annot_G2_G1c, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")
## G2-G1i trt-ctrl
# load annotation
annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT, annotFile = annot_G2_G1i, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")
## Outliers in Manhattan plot: 15% diff + 2SD
outliers_G1_final <- which(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff)))
outliers_annot_G1 <- as.data.frame(diffAnn_PAR@members)[outliers_G1_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T[outliers_G1_final, ],
annotFile = outliers_annot_G1, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")
outliers_G2_G1c_final <- which(abs(DMSlist$DMS_15pc_CC_CT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_CC_CT$meth.diff)))
outliers_annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)[outliers_G2_G1c_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT[outliers_G2_G1c_final, ],
annotFile = outliers_annot_G2_G1c, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")
outliers_G2_G1i_final <- which(abs(DMSlist$DMS_15pc_TC_TT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_TC_TT$meth.diff)))
outliers_annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)[outliers_G2_G1i_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT[outliers_G2_G1i_final, ],
annotFile = outliers_annot_G2_G1i, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")
Question: how are the beta values in the different groups at the parental DMS?
##############
## Prepare dataset
PM_G1 <- getPMdataset(uniteCov = uniteCov6_G1_woSexAndUnknowChrOVERLAP, MD = fullMetadata_PAR, gener="parents")
PM_G2 <- getPMdataset(uniteCov = uniteCov14_G2_woSexAndUnknowChrOVERLAP, MD = fullMetadata_OFFS, gener="offspring")
Test of VCA: variance component analysis https://cran.r-project.org/web/packages/VCA/vignettes/VCA_package_vignette.html
PM_G2_mean_hypo <- PM_G2[PM_G2$hypohyper %in% "hypo", ] %>%
group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()
varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo,
MeanLine=list(var=c("G1_trt", "G2_trt"),
col=c("white", "blue"), lwd=c(2,2)),
BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypomethylated upon infection"))
myfitVCA_hypo <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo)
### Real values
trtEffect <- sum(myfitVCA_hypo$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hypo$aov.tab[5:8, 5])
error <- sum(myfitVCA_hypo$aov.tab[9, 5])
realValHypoVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)
### Randomisation
myrandomVCA <- function(df=PM_G2_mean_hypo){
randomDF = df
randomDF$G1_trt = sample(PM_G2_mean_hypo$G1_trt, replace = F)
randomDF$G2_trt = sample(PM_G2_mean_hypo$G2_trt, replace = F)
randomDF$brotherPairID = sample(PM_G2_mean_hypo$brotherPairID, replace = F)
myfitVCA <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = randomDF)
trtEffect <- sum(myfitVCA$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA$aov.tab[5:8, 5])
error <- sum(myfitVCA$aov.tab[9, 5])
return(data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error))
}
# randomHypoVCA = do.call(rbind, lapply(1:1000, function(x) {
# df=myrandomVCA(PM_G2_mean_hypo)
# df$rep=x
# return(df)}))
# randomHypoVCA = melt(randomHypoVCA, id.vars = "rep")
# saveRDS(randomHypoVCA, file = "Rdata/randomHypoVCA.RDS")
randomHypoVCA <- readRDS(file = "Rdata/randomHypoVCA.RDS")
df2=reshape2::melt(realValHypoVCA)
sumDF <- randomHypoVCA %>%
group_by(variable) %>%
dplyr::summarize(value = mean(value)) %>% data.frame()
ggplot(randomHypoVCA, aes(x=variable, y=value))+
geom_boxplot()+
geom_jitter(width=.1, alpha=.2)+
geom_point(data = df2, col = "red", size = 6)+
geom_text(data=sumDF, aes(label=round(value)), col="white")+
geom_text(data = df2, aes(label=round(value)), col="white")+
theme_cleveland()+
ggtitle("VCA with bootstrap N=1000 at hypo-parDMS", subtitle = "red: observed values")
# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
##
##
##
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
##
## > VCA Result:
## -------------
##
## Name DF SS MS VC %Total SD
## 1 total 6.6733 15.8572 100 3.9821
## 2 G1_trt 1 319.6035 319.6035 4.7515 29.9646 2.1798
## 3 G2_trt 1 3.8987 3.8987 0.0712 0.4491 0.2669
## 4 G1_trt:G2_trt 1 1.5539 1.5539 0* 0* 0*
## 5 brotherPairID 7 129.185 18.455 0* 0* 0*
## 6 G1_trt:brotherPairID 7 395.9735 56.5676 7.8576 49.5525 2.8031
## 7 G2_trt:brotherPairID 7 9.9214 1.4173 0* 0* 0*
## 8 G1_trt:G2_trt:brotherPairID 7 20.2165 2.8881 0* 0* 0*
## 9 error 79 250.9663 3.1768 3.1768 20.0337 1.7824
## CV[%] Var(VC)
## 1 6.6047
## 2 3.6154 66.6443
## 3 0.4426 0.0124
## 4 0* 0.0096
## 5 0* 5.4751
## 6 4.6493 19.7869
## 7 0* 0.068
## 8 0* 0.2383
## 9 2.9562 0.2555
##
## Mean: 60.2922 (N = 111)
##
## Experimental Design: unbalanced | Method: ANOVA | * VC set to 0 | adapted MS used for total DF
##
##
## > VC:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 15.8572 6.824 68.824 7.787 53.1883
## G1_trt 4.7515 0* 20.7519 0* 18.1795
## G2_trt 0.0712 0* 0.2897 0* 0.2545
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 7.8576 0* 16.576 0.5409 15.1744
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 3.1768 2.3794 4.457 2.491 4.2163
##
## > SD:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 3.9821 2.6123 8.296 2.7905 7.293
## G1_trt 2.1798 0* 4.5554 0* 4.2637
## G2_trt 0.2669 0* 0.5382 0* 0.5045
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 2.8031 0* 4.0714 0.7355 3.8954
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 1.7824 1.5425 2.1112 1.5783 2.0534
##
## > CV[%]:
## --------
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 6.6047 4.3327 13.7597 4.6283 12.0961
## G1_trt 3.6154 0* 7.5556 0* 7.0718
## G2_trt 0.4426 0* 0.8926 0* 0.8368
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 4.6493 0* 6.7527 1.2199 6.4609
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 2.9562 2.5584 3.5015 2.6177 3.4057
##
##
## 95% Confidence Level | CIs for negative VCs excluded | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs
PM_G2_mean_hyper <- PM_G2[PM_G2$hypohyper %in% "hyper", ] %>%
group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()
varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper,
MeanLine=list(var=c("G1_trt", "G2_trt"),
col=c("white", "blue"), lwd=c(2,2)),
BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypermethylated upon infection"))
myfitVCA_hyper <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper)
### Real values
trtEffect <- sum(myfitVCA_hyper$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hyper$aov.tab[5:8, 5])
error <- sum(myfitVCA_hyper$aov.tab[9, 5])
realValHyperVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)
### Randomisation
# randomHyperVCA = do.call(rbind, lapply(1:1000, function(x) {
# df=myrandomVCA(PM_G2_mean_hyper)
# df$rep=x
# return(df)}))
#
# randomHyperVCA = melt(randomHyperVCA, id.vars = "rep")
# saveRDS(randomHyperVCA, file = "Rdata/randomHyperVCA.RDS")
randomHyperVCA <- readRDS(file = "Rdata/randomHyperVCA.RDS")
df2=reshape2::melt(realValHyperVCA)
sumDF <- randomHyperVCA %>%
group_by(variable) %>%
dplyr::summarize(value = mean(value)) %>% data.frame()
ggplot(randomHyperVCA, aes(x=variable, y=value))+
geom_boxplot()+
geom_jitter(width=.1, alpha=.2)+
geom_point(data = df2, col = "red", size = 6)+
geom_text(data=sumDF, aes(label=round(value)), col="white")+
geom_text(data = df2, aes(label=round(value)), col="white")+
theme_cleveland()+
ggtitle("VCA with bootstrap N=1000 at hyper-parDMS", subtitle = "red: observed values")
# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
##
##
##
## Inference from (V)ariance (C)omponent (A)nalysis
## ------------------------------------------------
##
## > VCA Result:
## -------------
##
## Name DF SS MS VC %Total SD
## 1 total 6.6733 15.8572 100 3.9821
## 2 G1_trt 1 319.6035 319.6035 4.7515 29.9646 2.1798
## 3 G2_trt 1 3.8987 3.8987 0.0712 0.4491 0.2669
## 4 G1_trt:G2_trt 1 1.5539 1.5539 0* 0* 0*
## 5 brotherPairID 7 129.185 18.455 0* 0* 0*
## 6 G1_trt:brotherPairID 7 395.9735 56.5676 7.8576 49.5525 2.8031
## 7 G2_trt:brotherPairID 7 9.9214 1.4173 0* 0* 0*
## 8 G1_trt:G2_trt:brotherPairID 7 20.2165 2.8881 0* 0* 0*
## 9 error 79 250.9663 3.1768 3.1768 20.0337 1.7824
## CV[%] Var(VC)
## 1 6.6047
## 2 3.6154 66.6443
## 3 0.4426 0.0124
## 4 0* 0.0096
## 5 0* 5.4751
## 6 4.6493 19.7869
## 7 0* 0.068
## 8 0* 0.2383
## 9 2.9562 0.2555
##
## Mean: 60.2922 (N = 111)
##
## Experimental Design: unbalanced | Method: ANOVA | * VC set to 0 | adapted MS used for total DF
##
##
## > VC:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 15.8572 6.824 68.824 7.787 53.1883
## G1_trt 4.7515 0* 20.7519 0* 18.1795
## G2_trt 0.0712 0* 0.2897 0* 0.2545
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 7.8576 0* 16.576 0.5409 15.1744
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 3.1768 2.3794 4.457 2.491 4.2163
##
## > SD:
## -----
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 3.9821 2.6123 8.296 2.7905 7.293
## G1_trt 2.1798 0* 4.5554 0* 4.2637
## G2_trt 0.2669 0* 0.5382 0* 0.5045
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 2.8031 0* 4.0714 0.7355 3.8954
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 1.7824 1.5425 2.1112 1.5783 2.0534
##
## > CV[%]:
## --------
## Estimate CI LCL CI UCL One-Sided LCL One-Sided UCL
## total 6.6047 4.3327 13.7597 4.6283 12.0961
## G1_trt 3.6154 0* 7.5556 0* 7.0718
## G2_trt 0.4426 0* 0.8926 0* 0.8368
## G1_trt:G2_trt 0
## brotherPairID 0
## G1_trt:brotherPairID 4.6493 0* 6.7527 1.2199 6.4609
## G2_trt:brotherPairID 0
## G1_trt:G2_trt:brotherPairID 0
## error 2.9562 2.5584 3.5015 2.6177 3.4057
##
##
## 95% Confidence Level | CIs for negative VCs excluded | * CI-limits constrained to be >= 0
## SAS PROC MIXED method used for computing CIs
parmod <- lmer(data = PM_G1, BetaValue ~ meth.diff.parentals : Treatment + (1|CpGSite) + (1|brotherPairID))
## check normality of residuals assumption
qqnorm(resid(parmod))
qqline(resid(parmod))
pred <- ggpredict(parmod, terms = c("meth.diff.parentals", "Treatment"))
plot(pred, add.data = T)+
scale_color_manual(values = c("black", "red"))+
scale_y_continuous(name = "Beta values")+
scale_x_continuous(name = "Methylation difference between infected and control parents in percentage")+
ggtitle("Predicted methylation ratio (Beta) values in parents\n as a function of differential methylation between exposed and control groups")+
theme_bw()
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2, REML = F) # REML =F for model comparison
mod_noG1trt <- lmer(BetaValue ~ G2_trt:hypohyper + (1|CpGSite)+ (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noG2trt <-lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noInteractions <- lmer(BetaValue ~ (G1_trt + G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noHypoHyper <- lmer(BetaValue ~ (G1_trt * G2_trt) + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
## check normality of residuals assumption
qqnorm(resid(modFull))
qqline(resid(modFull))
## Likelihood ratio tests for all variables:
lrtest(modFull, mod_noG1trt) # G1 trt is VERY VERY significant (LRT: χ² (4) = 1163.6, p < 0.001)
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G2_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 |
## brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 8 -1516301 -4 1163.6 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noG2trt) # G2 trt is VERY VERY significant (LRT: χ² (4) = 30.02, p < 0.001) NB that changed when brotherpair is used instead of family!
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ G1_trt:hypohyper + (1 | CpGSite) + (1 | Sex) + (1 |
## brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 8 -1515734 -4 30.022 4.844e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noInteractions) # interactions are significant (LRT: χ² (2) = 9.21, p < 0.01)
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt + G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 10 -1515724 -2 9.211 0.009997 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lrtest(modFull, mod_noHypoHyper) # hypo/hyper VERY VERY significant (LRT: χ² (4) = 1140, p < 0.001)
## Likelihood ratio test
##
## Model 1: BetaValue ~ (G1_trt * G2_trt):hypohyper + (1 | CpGSite) + (1 |
## Sex) + (1 | brotherPairID)
## Model 2: BetaValue ~ (G1_trt * G2_trt) + (1 | CpGSite) + (1 | Sex) + (1 |
## brotherPairID)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 12 -1515719
## 2 8 -1516289 -4 1140.4 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Post-hoc tests between treatments
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2)
modFull_emmeans <- emmeans(modFull, list(pairwise ~ (G1_trt:G2_trt):hypohyper), adjust = "tukey")
modFull_emmeans
## $`emmeans of G1_trt, G2_trt, hypohyper`
## G1_trt G2_trt hypohyper emmean SE df asymp.LCL asymp.UCL
## Control Control hypo 62.1 0.960 Inf 60.2 64.0
## Exposed Control hypo 58.8 0.959 Inf 57.0 60.7
## Control Exposed hypo 62.3 0.960 Inf 60.4 64.2
## Exposed Exposed hypo 58.9 0.960 Inf 57.1 60.8
## Control Control hyper 56.6 0.872 Inf 54.9 58.3
## Exposed Control hyper 58.7 0.872 Inf 57.0 60.4
## Control Exposed hyper 55.8 0.872 Inf 54.1 57.6
## Exposed Exposed hyper 58.6 0.872 Inf 56.9 60.3
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $`pairwise differences of G1_trt, G2_trt, hypohyper`
## contrast estimate SE df z.ratio
## Control Control hypo - Exposed Control hypo 3.2670 0.199 Inf 16.439
## Control Control hypo - Control Exposed hypo -0.2024 0.205 Inf -0.989
## Control Control hypo - Exposed Exposed hypo 3.1756 0.202 Inf 15.735
## Control Control hypo - Control Control hyper 5.5292 0.672 Inf 8.226
## Control Control hypo - Exposed Control hyper 3.3925 0.672 Inf 5.049
## Control Control hypo - Control Exposed hyper 6.2691 0.673 Inf 9.317
## Control Control hypo - Exposed Exposed hyper 3.5420 0.672 Inf 5.268
## Exposed Control hypo - Control Exposed hypo -3.4694 0.202 Inf -17.160
## Exposed Control hypo - Exposed Exposed hypo -0.0913 0.199 Inf -0.460
## Exposed Control hypo - Control Control hyper 2.2623 0.671 Inf 3.369
## Exposed Control hypo - Exposed Control hyper 0.1256 0.671 Inf 0.187
## Exposed Control hypo - Control Exposed hyper 3.0021 0.672 Inf 4.467
## Exposed Control hypo - Exposed Exposed hyper 0.2751 0.671 Inf 0.410
## Control Exposed hypo - Exposed Exposed hypo 3.3780 0.205 Inf 16.479
## Control Exposed hypo - Control Control hyper 5.7317 0.673 Inf 8.514
## Control Exposed hypo - Exposed Control hyper 3.5950 0.673 Inf 5.342
## Control Exposed hypo - Control Exposed hyper 6.4715 0.674 Inf 9.608
## Control Exposed hypo - Exposed Exposed hyper 3.7444 0.673 Inf 5.561
## Exposed Exposed hypo - Control Control hyper 2.3536 0.672 Inf 3.501
## Exposed Exposed hypo - Exposed Control hyper 0.2169 0.672 Inf 0.323
## Exposed Exposed hypo - Control Exposed hyper 3.0935 0.673 Inf 4.597
## Exposed Exposed hypo - Exposed Exposed hyper 0.3664 0.672 Inf 0.545
## Control Control hyper - Exposed Control hyper -2.1367 0.137 Inf -15.618
## Control Control hyper - Control Exposed hyper 0.7398 0.141 Inf 5.251
## Control Control hyper - Exposed Exposed hyper -1.9872 0.139 Inf -14.332
## Exposed Control hyper - Control Exposed hyper 2.8765 0.140 Inf 20.557
## Exposed Control hyper - Exposed Exposed hyper 0.1495 0.137 Inf 1.091
## Control Exposed hyper - Exposed Exposed hyper -2.7271 0.141 Inf -19.290
## p.value
## <.0001
## 0.9761
## <.0001
## <.0001
## <.0001
## <.0001
## <.0001
## <.0001
## 0.9998
## 0.0172
## 1.0000
## 0.0002
## 0.9999
## <.0001
## <.0001
## <.0001
## <.0001
## <.0001
## 0.0110
## 1.0000
## 0.0001
## 0.9994
## <.0001
## <.0001
## <.0001
## <.0001
## 0.9589
## <.0001
##
## Degrees-of-freedom method: asymptotic
## P value adjustment: tukey method for comparing a family of 8 estimates
P1 <- plot(modFull_emmeans, by = "hypohyper", comparisons = TRUE) +
# coord_flip()+
theme_bw() +
ggtitle("Estimated marginal means of methylation ratio (beta)\n of offspring at parental DMS")+
theme(legend.position = "none", axis.title.x = element_blank()) +
scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))
## NB: Comparison arrows: https://cran.r-project.org/web/packages/emmeans/vignettes/xplanations.html
## two estimated marginal means (EMMs) differ significantly if, and only if, their respective comparison arrows do not overlap
## These comparison arrows are decidedly not the same as confidence intervals.
## Confidence intervals for EMMs are based on the statistical properties of the individual EMMs, whereas comparison arrows
## are based on the statistical properties of differences of EMMs.
## Add the PARENTAL DMS value
## Same test on ALL, G1 and G2 fish
modFullG1 <- lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|brotherPairID), data = PM_G1)
modFullG1_emmeans <- emmeans(modFullG1, list(pairwise ~ G1_trt:hypohyper), adjust = "tukey")
modFullG1_emmeans
## $`emmeans of G1_trt, hypohyper`
## G1_trt hypohyper emmean SE df asymp.LCL asymp.UCL
## Control hypo 66.9 0.657 Inf 65.7 68.2
## Exposed hypo 49.1 0.661 Inf 47.8 50.4
## Control hyper 48.6 0.534 Inf 47.6 49.6
## Exposed hyper 65.8 0.536 Inf 64.7 66.8
##
## Degrees-of-freedom method: asymptotic
## Confidence level used: 0.95
##
## $`pairwise differences of G1_trt, hypohyper`
## 1 estimate SE df z.ratio p.value
## Control hypo - Exposed hypo 17.889 0.303 Inf 59.074 <.0001
## Control hypo - Control hyper 18.343 0.643 Inf 28.539 <.0001
## Control hypo - Exposed hyper 1.179 0.645 Inf 1.829 0.2597
## Exposed hypo - Control hyper 0.454 0.647 Inf 0.701 0.8966
## Exposed hypo - Exposed hyper -16.711 0.649 Inf -25.761 <.0001
## Control hyper - Exposed hyper -17.164 0.209 Inf -82.107 <.0001
##
## Degrees-of-freedom method: asymptotic
## P value adjustment: tukey method for comparing a family of 4 estimates
P2 <- plot(modFullG1_emmeans, by = "hypohyper", comparisons = TRUE) +
theme_bw() +
ggtitle("Estimated marginal means of methylation ratio (beta)\n of parents at DMS")+
theme(legend.position = "none", axis.title.x = element_blank()) +
scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))
ggarrange(P2, P1, labels = c("A", "B"), ncol = 1, nrow = 2)
length(unique(PM_G1$CpGSite))# 3648 positions
## [1] 3648
PM_G1 %>% dplyr::count(ID)## NB: not all covered in all samples
## ID n
## 1 S16 3300
## 2 S17 2842
## 3 S35 2747
## 4 S36 2568
## 5 S52 3037
## 6 S53 3468
## 7 S68 3348
## 8 S69 3500
## 9 S70 2237
## 10 S71 3302
## 11 S72 3479
## 12 S73 2973
## 13 S76 3239
## 14 S77 3232
## 15 S78 3434
## 16 S79 2679
## 17 S94 2564
## 18 S95 1766
## 19 S107 3414
## 20 S108 3230
## 21 S124 3387
## 22 S125 2526
## 23 S143 3097
## 24 S144 2773
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hypo"]))# 1176 positions hypomethylated upon parental inf
## [1] 1176
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hyper"]))# 2472 positions hypermethylated upon parental inf
## [1] 2472
myfun <- function(X){
## Calculate nbr of CpG hypo/hypermethylated per individual, and nbr of covered CpG:
X <- X %>% group_by(ID, Treatment, brotherPairID, clutch.ID, Sex) %>%
dplyr::summarise(ncov = n(),
hypoMeth = sum(BetaValue < 0.3),
hyperMeth = sum(BetaValue > 0.7)) %>% data.frame()
# Calculate residuals of nbr of methCpG by nbr of covered CpG
X$res_Nbr_methCpG_Nbr_coveredCpG_HYPO = residuals(lm(X$hypoMeth ~ X$ncov))
X$res_Nbr_methCpG_Nbr_coveredCpG_HYPER = residuals(lm(X$hyperMeth ~ X$ncov))
## Statistical model: is it different in each offspring trt group?
mod1 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
mod0 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
print(lrtest(mod1, mod0))
## Post-hoc test:
modhypo <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X)
## pairwise posthoc test
print(emmeans(modhypo, list(pairwise ~ Treatment), adjust = "tukey"))
mod3 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
mod4 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
print(lrtest(mod3, mod4))
## Post-hoc test:
modhyper <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X)
## pairwise posthoc test
print(emmeans(modhyper, list(pairwise ~ Treatment), adjust = "tukey"))
## Plot
phypo <- plot(ggpredict(modhypo, terms = c("Treatment")), add.data = TRUE)+
scale_y_continuous("Residuals of number of hypomethylated methylated \ncytosines on number of cytosines covered") +
ggtitle("Predicted residuals nbr of hypomethylated CpG")+
theme_bw()
phyper <- plot(ggpredict(modhyper, terms = c("Treatment")), add.data = TRUE)+
scale_y_continuous("Residuals of number of hypermethylated methylated \n cytosines on number of cytosines covered") +
ggtitle("Predicted residuals nbr of hypermethylated CpG")+
theme_bw()
return(list(phypo, phyper))
}
listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hypo",])
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -420.75
## 2 5 -423.55 -3 5.605 0.1325
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control -4.01 4.11 16.2 -12.72 4.70
## NE_exposed -5.64 4.14 16.3 -14.40 3.11
## E_control 7.45 4.12 16.1 -1.29 16.18
## E_exposed 4.61 4.12 16.1 -4.11 13.33
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed 1.63 2.55 93.08 0.640 0.9188
## NE_control - E_control -11.46 5.83 8.47 -1.966 0.2722
## NE_control - E_exposed -8.62 5.82 8.47 -1.482 0.4875
## NE_exposed - E_control -13.09 5.86 8.53 -2.234 0.1893
## NE_exposed - E_exposed -10.25 5.85 8.53 -1.754 0.3559
## E_control - E_exposed 2.84 2.50 92.53 1.135 0.6692
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
##
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -419.75
## 2 5 -422.58 -3 5.6616 0.1293
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control 3.97 4.18 16.1 -4.88 12.82
## NE_exposed 5.61 4.20 16.2 -3.29 14.51
## E_control -7.52 4.19 16.0 -16.40 1.35
## E_exposed -4.53 4.18 16.1 -13.39 4.33
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed -1.64 2.51 93.04 -0.651 0.9148
## NE_control - E_control 11.50 5.92 8.37 1.942 0.2815
## NE_control - E_exposed 8.50 5.91 8.37 1.438 0.5112
## NE_exposed - E_control 13.13 5.95 8.43 2.208 0.1971
## NE_exposed - E_exposed 10.14 5.94 8.43 1.707 0.3771
## E_control - E_exposed -3.00 2.47 92.51 -1.216 0.6185
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
## NOT significant
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
top = text_grob("Parental DMS hypomethylated upon infection, in offspring"))
listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hyper",])
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -476.63
## 2 5 -482.69 -3 12.123 0.006974 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control 13.94 5.13 17.0 3.12 24.75
## NE_exposed 8.48 5.19 17.2 -2.46 19.42
## E_control -9.77 5.15 16.8 -20.66 1.11
## E_exposed -12.95 5.14 16.9 -23.79 -2.11
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed 5.46 4.45 93.8 1.225 0.6124
## NE_control - E_control 23.71 7.28 10.3 3.257 0.0353
## NE_control - E_exposed 26.88 7.26 10.3 3.701 0.0172
## NE_exposed - E_control 18.25 7.36 10.5 2.481 0.1209
## NE_exposed - E_exposed 21.43 7.33 10.5 2.923 0.0598
## E_control - E_exposed 3.17 4.37 93.0 0.726 0.8864
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
##
## Likelihood ratio test
##
## Model 1: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## Model 2: res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1 | brotherPairID/clutch.ID) +
## (1 | Sex)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -477.24
## 2 5 -483.28 -3 12.081 0.00711 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $`emmeans of Treatment`
## Treatment emmean SE df lower.CL upper.CL
## NE_control -14.11 5.18 17.0 -25.04 -3.18
## NE_exposed -8.53 5.24 17.2 -19.58 2.52
## E_control 9.95 5.21 16.8 -1.05 20.95
## E_exposed 12.96 5.19 16.9 2.00 23.92
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Treatment`
## 1 estimate SE df t.ratio p.value
## NE_control - NE_exposed -5.58 4.47 93.8 -1.247 0.5989
## NE_control - E_control -24.06 7.36 10.3 -3.269 0.0348
## NE_control - E_exposed -27.07 7.34 10.3 -3.687 0.0177
## NE_exposed - E_control -18.48 7.43 10.4 -2.486 0.1204
## NE_exposed - E_exposed -21.49 7.41 10.4 -2.901 0.0622
## E_control - E_exposed -3.01 4.39 93.0 -0.686 0.9021
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 4 estimates
## Treatment SIGNIFICANT in both excess hypo/hyper methylation **
# $`pairwise differences of Treatment`
## HYPO
# 1 estimate SE df t.ratio p.value
# NE_control - E_control 23.71 7.28 10.3 3.257 0.0353
# NE_control - E_exposed 26.88 7.26 10.3 3.701 0.0172
## HYPER
# 1 estimate SE df t.ratio p.value
# NE_control - E_control -24.06 7.36 10.3 -3.269 0.0348
# NE_control - E_exposed -27.07 7.34 10.3 -3.687 0.0177
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
top = text_grob("Parental DMS hypermethylated upon infection, in offspring"))
-> The beta values in parentalDMS in offspring follow the parental pattern hypo/hyper methylated upon infection